##########################
##
##	GLOBALS
##
##########################
set.seed(01282017)
options(scipen=999)  # No scientific notation

#setwd('/path/to/replication/')

library(irr)
library(ggplot2)
library(reshape)

##########################
##
##	RAW DATA
##
##########################
tweets <- read.csv('./Data/02_processedData/c_DonghyeonAlexmerged_Newclassifiers_ShortSpain.csv')


##########################
##
##	COMPARING FACE ESTIMATES
##
##########################
two_one <- read.csv('./Data/facesValidation/Labeling/Step2_FaceValidation_Bernard.csv', stringsAsFactors=FALSE, na.strings=c('NA', 'N/A'))
two_two <- read.csv('./Data/facesValidation/Labeling/Step2_FaceValidation_Jack.csv', stringsAsFactors=FALSE, na.strings=c('NA', 'N/A'))
two_two <- two_two[,names(two_one)]  # Jack added a column that I don't need
two_three <- read.csv('./Data/facesValidation/Labeling/Step2_FaceValidation_Jun.csv', stringsAsFactors=FALSE, na.strings=c('NA', 'N/A'))

manual <- rbind(two_one, two_two, two_three)

df <- merge(tweets, manual, by.x='id', by.y='id', all.x=FALSE, all.y=TRUE)


# Function modified from here: http://www.sthda.com/english/wiki/ggplot2-barplots-quick-start-guide-r-software-and-data-visualization
data_summary <- function(data, varname, groupnames){
  require(plyr)
  summary_func <- function(x, col){
    c(mean = mean(x[[col]], na.rm=TRUE),
      sd = sd(x[[col]], na.rm=TRUE),
    	count = nrow(x))
  }
  data_sum<-ddply(data, groupnames, .fun=summary_func, varname)
  #data_sum <- data %>% group_by(groupnames) %>% summarize(mean=)
  #data_sum <- rename(data_sum, c("mean" = varname))

  names(data_sum)[names(data_sum) == groupnames] <- 'Binary'
  data_sum$variable <- groupnames
 return(data_sum)
}

pv <- data_summary(data=df, varname='protest_result.protester_violence', groupnames='Protester_Violence.')
sv <- data_summary(data=df, varname='protest_result.state_violence', groupnames='State_Violence.')
fire <- data_summary(data=df, varname='protest_result.fire', groupnames='Fire.')
police <- data_summary(data=df, varname='protest_result.police', groupnames='Police.')
temp <- rbind(pv, sv, fire, police)
temp <- temp[is.na(temp$Binary) == FALSE,]
temp$se <- temp$sd/sqrt(temp$count)
temp$variable <- gsub('\\.', '', temp$variable)
temp$variable <- gsub('_', '\n', temp$variable)


### FIG A9
ggplot(temp, aes(x=variable, y=mean, fill=as.factor(Binary))) + geom_bar(stat='identity', position=position_dodge()) + geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.2, position=position_dodge(.9)) + scale_fill_manual(labels=c('No', 'Yes'), values=c('red', 'darkgreen')) + xlab('') + ylab('Mean CNN Estimate')  + theme_classic() + labs(fill='Human Label') + theme(text=element_text(size=14))
ggsave(plot=last_plot(), filename='./Figures/classifierValidation_barchart.jpg', width=5, height=4, units='in')


### FIG A10
ggplot(df, aes(x=totalFaces, y=Number_of_faces)) + geom_point() + stat_smooth(method='loess') + geom_abline(a=1, b=0, lty='dotted') + theme_classic() + xlab('FairFace Count') + ylab('Human Coder Face Count')
ggsave(plot=last_plot(), filename='./Figures/faceValidation_loess.jpg', width=5, height=4, units='in')

